In [ ]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#set background color grey
sns.set_theme(style="darkgrid")
In [ ]:
df = pd.read_csv("manual_turns.csv")

df.drop(columns=['Unnamed: 0'], inplace=True)
df['turn_duration'] = 0.2*(df['end_idx'].astype('float') - df['start_idx'].astype('float'))
df.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 145.0 1813.000000 643.813616 407.000000 2102.000000 2105.000000 2108.000000 2111.000000
path_num 145.0 2.303448 0.719994 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 145.0 2.027586 1.301414 1.000000 1.000000 2.000000 3.000000 7.000000
start_idx 145.0 520.820690 432.977140 45.000000 245.000000 393.000000 649.000000 2124.000000
end_idx 145.0 585.337931 446.037869 84.000000 289.000000 477.000000 713.000000 2273.000000
walking_direction_lag 145.0 -4.862069 28.262809 -164.000000 -14.000000 0.000000 7.000000 80.000000
walking_direction_base_corr 145.0 0.089897 0.451317 -0.857578 -0.326919 0.210051 0.448113 0.866582
walking_direction_lagged_corr 145.0 0.488734 0.156062 0.178460 0.382983 0.467089 0.607509 0.910992
walking_direction_dtw 145.0 56.049318 34.593451 3.665118 29.714181 46.373857 73.494923 191.612370
speeds_lag 145.0 -0.434483 17.279489 -49.000000 -7.000000 -1.000000 4.000000 92.000000
speeds_base_corr 145.0 0.247639 0.331154 -0.663930 0.040225 0.290227 0.516385 0.836185
speeds_lagged_corr 145.0 0.507480 0.138251 0.184940 0.425878 0.493722 0.605422 0.850110
speeds_dtw 145.0 40.597031 19.204629 10.222585 26.853865 37.784558 50.858723 140.600285
mean_distance 145.0 2.313964 1.488589 0.336612 1.348608 2.010133 3.021977 13.639054
mean_speed_difference 145.0 0.364955 0.123349 0.137744 0.278000 0.340217 0.436231 0.797638
mean_walking_direction_difference 145.0 58.546456 16.733966 18.568244 46.072398 58.550238 68.860939 100.850062
mean_pace_asymmetry 145.0 0.410603 0.112106 0.096129 0.342006 0.395674 0.467422 0.870018
turn_duration 145.0 12.903448 6.792034 3.600000 7.800000 11.800000 15.400000 50.000000
In [ ]:
# Count for 'speeds_lag'
speeds_lag_counts = df['speeds_lag'].apply(lambda x: 'Negative' if x < 0 else ('Zero' if x == 0 else 'Positive')).value_counts()

# Count for 'walking_speed_lag'
walking_speed_lag_counts = df['walking_direction_lag'].apply(lambda x: 'Negative' if x < 0 else ('Zero' if x == 0 else 'Positive')).value_counts()

speeds_lag_counts.plot.bar()
plt.title("Counts for 'speeds_lag'")
plt.xlabel("Speeds Lag Category")
plt.ylabel("Count")
plt.show()

walking_speed_lag_counts.plot.bar()
plt.title("Counts for 'walking_direction_lag'")
plt.xlabel("Walking Direction Lag Category")
plt.ylabel("Count")
plt.show()
No description has been provided for this image
No description has been provided for this image
In [ ]:
# box plot turn duration
sns.boxplot(x=df['turn_duration'])
plt.title('Box plot of turn duration')
plt.show()
No description has been provided for this image
In [ ]:
df['normalized_walking_direction_dtw'] = df['walking_direction_dtw'] / (df['turn_duration'] / 0.2)
df['normalized_speeds_dtw'] = df['speeds_dtw'] / (df['turn_duration'] / 0.2)
In [ ]:
df['abs_walking_direction_lag'] = df['walking_direction_lag'].abs()
df['abs_speeds_lag'] = df['speeds_lag'].abs()
In [ ]:
relevant_features = [
       'turn_duration', 
       'mean_distance',
       'mean_pace_asymmetry',
       'walking_direction_lag', 
       'abs_walking_direction_lag',
       'walking_direction_dtw', 
       'normalized_walking_direction_dtw', 
       # 'walking_direction_base_corr', 
       'walking_direction_lagged_corr', 
       # 'mean_walking_direction_difference', 
       'speeds_lag', 
       'abs_speeds_lag',
       'speeds_dtw', 
       'normalized_speeds_dtw', 
       # 'speeds_base_corr', 
       'speeds_lagged_corr', 
       # 'mean_speed_difference',
       ]
In [ ]:
corr = df[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".2f", mask=mask)
# make it bigger
plt.title(f"Metrics Correlation Matrix - Original Data (n={len(df)})")
plt.show()
No description has been provided for this image
In [ ]:
# find rows with the same participant_id and path_num with overlapping start_idx and end_idx
df['overlapping'] = False
for index, row in df.iterrows():
    if len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['start_idx'] >= row['start_idx']) & (df['start_idx'] <= row['end_idx'])]) > 1 or \
        len(df[(df['participant_id'] == row['participant_id']) & (df['path_num'] == row['path_num']) & (df['end_idx'] >= row['start_idx']) & (df['end_idx'] <= row['end_idx'])]) > 1:
        df.at[index, 'overlapping'] = True
    
overlapping_and_not_subset = df[df['overlapping'] == True]
# overlapping_and_not_subset = df
oans = overlapping_and_not_subset

corr_oans = oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = np.abs(corr_oans) < 0.3
plt.figure(figsize=(12, 10))
sns.heatmap(corr_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Overlapping Data (n={len(oans)})")
plt.show()
No description has been provided for this image
In [ ]:
threshold = 0.3
filtered_df = df[(df['walking_direction_lagged_corr'] > threshold) & (df['speeds_lagged_corr'] > threshold)]
filtered_oans = filtered_df[filtered_df['overlapping'] == True]
# filtered_oans = filtered_df

corr_filtered_oans = filtered_oans[relevant_features].corr(method='pearson',numeric_only=True)
mask = (np.abs(corr_filtered_oans) < 0.3) 
plt.figure(figsize=(12, 10))
sns.heatmap(corr_filtered_oans, annot=True, fmt=".2f", mask=mask)
plt.title(f"Metrics Correlation Matrix - Filtered Overlapping Data (n={len(filtered_oans)})")
plt.show()
No description has been provided for this image
In [ ]:
filtered_oans.describe().T
Out[ ]:
count mean std min 25% 50% 75% max
participant_id 85.0 1885.729412 573.373276 407.000000 2102.000000 2105.000000 2107.000000 2111.000000
path_num 85.0 2.258824 0.742469 1.000000 2.000000 2.000000 3.000000 3.000000
turn_num 85.0 1.905882 1.436128 1.000000 1.000000 1.000000 2.000000 7.000000
start_idx 85.0 486.105882 471.125451 45.000000 170.000000 372.000000 558.000000 2124.000000
end_idx 85.0 552.423529 488.578974 84.000000 218.000000 431.000000 652.000000 2273.000000
walking_direction_lag 85.0 -6.494118 23.732449 -101.000000 -14.000000 -3.000000 2.000000 46.000000
walking_direction_base_corr 85.0 0.107917 0.459778 -0.857578 -0.333389 0.254710 0.463901 0.864534
walking_direction_lagged_corr 85.0 0.510698 0.139671 0.301173 0.394259 0.473310 0.612160 0.910992
walking_direction_dtw 85.0 56.276960 33.142225 3.665118 29.264177 48.481499 74.596861 155.080360
speeds_lag 85.0 -0.847059 17.121767 -48.000000 -7.000000 -1.000000 2.000000 92.000000
speeds_base_corr 85.0 0.300209 0.329221 -0.632908 0.167255 0.380213 0.531561 0.836185
speeds_lagged_corr 85.0 0.541314 0.121374 0.307027 0.451785 0.532035 0.620851 0.836185
speeds_dtw 85.0 39.709762 17.468461 10.673369 28.068922 38.937636 48.258955 84.791303
mean_distance 85.0 2.140396 0.969318 0.515054 1.262167 1.924040 2.837807 4.393454
mean_speed_difference 85.0 0.352921 0.111447 0.137744 0.273464 0.342893 0.426791 0.741000
mean_walking_direction_difference 85.0 57.025789 16.736801 18.568244 45.656283 57.769570 68.023336 95.338484
mean_pace_asymmetry 85.0 0.394076 0.088883 0.173147 0.333504 0.380928 0.446291 0.662044
turn_duration 85.0 13.263529 5.826312 3.600000 9.000000 12.800000 16.600000 29.800000
normalized_walking_direction_dtw 85.0 0.856659 0.389172 0.203618 0.598522 0.751000 1.097587 1.960544
normalized_speeds_dtw 85.0 0.627214 0.186108 0.256842 0.511719 0.611162 0.723188 1.187326
abs_walking_direction_lag 85.0 16.517647 18.161060 0.000000 3.000000 11.000000 26.000000 101.000000
abs_speeds_lag 85.0 9.576471 14.180282 0.000000 2.000000 4.000000 11.000000 92.000000
In [ ]:
from scipy.stats import pearsonr

for feature in relevant_features:
    to_display = []
    for feature2 in relevant_features:
        if not feature.startswith(feature2) and not feature2.startswith(feature) and not feature.endswith(feature2) and not feature2.endswith(feature)\
            and np.abs(corr_filtered_oans.loc[feature, feature2]) > 0.3:
            to_display.append(feature2)
    if len(to_display) == 0:
        continue
    # set plot grid of 1xlen(to_display)
    fig, axs = plt.subplots(int(np.ceil(len(to_display)/3)), min(len(to_display),3), figsize=(5*min(len(to_display),3),5*int(np.ceil(len(to_display)/3))))
    # print(axs.shape)
    axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot
    for i, feature2 in enumerate(to_display):
        peares = pearsonr(filtered_oans[feature], filtered_oans[feature2], alternative='two-sided')
        pcorr, p_val = peares.statistic, peares.pvalue
        CI = peares.confidence_interval(confidence_level=0.95)
        # Scatter plot
        sns.scatterplot(x=feature, y=feature2, data=filtered_oans, ax=axs[int(np.ceil(i/3))-1,i%3])
        # Regression line
        sns.regplot(x=feature, y=feature2, data=filtered_oans, scatter=False, line_kws={'color': 'red'}, ax=axs[int(np.ceil(i/3))-1,i%3])
        axs[int(np.ceil(i/3))-1,i%3].set_title(f"compared with {feature2}\ncorr: {round(corr_filtered_oans.loc[feature, feature2], 3)}, p_val: {round(p_val,5)}, CI: {[round(c,3) for c in CI]}", fontweight='bold')
    # add title "feature vs correlated features" to the plot
    fig.suptitle(f"{feature}'s correlations", fontweight='bold')
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
from PIL import Image
import seaborn as sns
from scipy import stats
# for each feature, find highest and lowest valued row and display them
for feature in relevant_features:
    # print(f"Feature: {feature}")
    # print("Highest values:")
    
    h_res = filtered_oans.loc[filtered_oans[feature].nlargest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(h_res)
    h_base_path = f"./turns/{h_res['participant_id'].values[0]}/{h_res['person_robot'].values[0]}/run_{h_res['path_num'].values[0]}/turn_{h_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(h_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(h_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(h_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(h_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in h_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - highest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    # print("Lowest values:")
    l_res = filtered_oans.loc[filtered_oans[feature].nsmallest(1).index, ['participant_id', 'person_robot', 'path_num', 'turn_num', feature]]
    # print(l_res)
    l_base_path = f"./turns/{l_res['participant_id'].values[0]}/{l_res['person_robot'].values[0]}/run_{l_res['path_num'].values[0]}/turn_{l_res['turn_num'].values[0]}/"
    fig, axs = plt.subplots(1, 4, figsize=(20, 5))
    paths_img = Image.open(l_base_path + "paths.png")
    axs[0].imshow(paths_img)
    axs[0].axis('off')
    distance_img = Image.open(l_base_path + "distance.png")
    axs[1].imshow(distance_img)
    axs[1].axis('off')
    walking_directions_img = Image.open(l_base_path + "walking_directions.png")
    axs[2].imshow(walking_directions_img)
    axs[2].axis('off')
    speeds_img = Image.open(l_base_path + "speeds.png")
    axs[3].imshow(speeds_img)
    axs[3].axis('off')
    to_print_dict = {k: round(v_val, 3) if isinstance(v_val, float) else v_val for k,v in l_res.to_dict().items() for v_key, v_val in v.items()}
    to_print_str = ", ".join([f"{k}: {v}" for k,v in to_print_dict.items()])
    fig.suptitle(f"{feature} - lowest value\n {to_print_str}", fontweight='bold')
    plt.tight_layout()
    plt.show()
    print("\n\n")
No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


No description has been provided for this image
No description has been provided for this image


In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.histplot(data=filtered_oans, x=feature, kde=True, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
import scipy.stats as stats

fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    stats.probplot(filtered_oans[feature], dist="norm", plot=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('Theoretical Quantiles')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('Ordered Values')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
fig, axs = plt.subplots(int(np.ceil(len(relevant_features)/3)), min(len(relevant_features),3), figsize=(15, 5*int(np.ceil(len(relevant_features)/3))))
axs = np.atleast_2d(axs)  # Ensure axs is always an array, even if it's a single subplot

for i, feature in enumerate(relevant_features):
    sns.boxplot(data=filtered_oans, y=feature, ax=axs[int(np.ceil(i/3))-1,i%3])
    axs[int(np.ceil(i/3))-1,i%3].set_title(feature)
    axs[int(np.ceil(i/3))-1,i%3].set_xlabel('')
    axs[int(np.ceil(i/3))-1,i%3].set_ylabel('')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
# Count for 'speeds_lag'
speeds_lag_counts = filtered_oans['speeds_lag'].apply(lambda x: 'Negative' if x < 0 else ('Zero' if x == 0 else 'Positive')).value_counts()

# Count for 'walking_speed_lag'
walking_speed_lag_counts = filtered_oans['walking_direction_lag'].apply(lambda x: 'Negative' if x < 0 else ('Zero' if x == 0 else 'Positive')).value_counts()

speeds_lag_counts.plot.bar()
plt.title("Counts for 'speeds_lag'")
plt.xlabel("Speeds Lag Category")
plt.ylabel("Count")
plt.show()

walking_speed_lag_counts.plot.bar()
plt.title("Counts for 'walking_direction_lag'")
plt.xlabel("Walking Direction Lag Category")
plt.ylabel("Count")
plt.show()
No description has been provided for this image
No description has been provided for this image